In [5]:
# Importing the necessary libraries

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [6]:
# Assigning name to the dataset and calling the dataset
dataset1 = pd.read_csv("C:\\Users\\HP\\Downloads\\GL_files\\project_oct_new\\parkinsons.csv")
In [7]:
dataset1  # Printing the dataset 
Out[7]:
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335
5 phon_R01_S01_6 120.552 131.162 113.787 0.00968 0.00008 0.00463 0.00750 0.01388 0.04701 ... 0.06985 0.01222 21.378 1 0.415564 0.825069 -4.242867 0.299111 2.187560 0.357775
6 phon_R01_S02_1 120.267 137.244 114.820 0.00333 0.00003 0.00155 0.00202 0.00466 0.01608 ... 0.02337 0.00607 24.886 1 0.596040 0.764112 -5.634322 0.257682 1.854785 0.211756
7 phon_R01_S02_2 107.332 113.840 104.315 0.00290 0.00003 0.00144 0.00182 0.00431 0.01567 ... 0.02487 0.00344 26.892 1 0.637420 0.763262 -6.167603 0.183721 2.064693 0.163755
8 phon_R01_S02_3 95.730 132.068 91.754 0.00551 0.00006 0.00293 0.00332 0.00880 0.02093 ... 0.03218 0.01070 21.812 1 0.615551 0.773587 -5.498678 0.327769 2.322511 0.231571
9 phon_R01_S02_4 95.056 120.103 91.226 0.00532 0.00006 0.00268 0.00332 0.00803 0.02838 ... 0.04324 0.01022 21.862 1 0.547037 0.798463 -5.011879 0.325996 2.432792 0.271362
10 phon_R01_S02_5 88.333 112.240 84.072 0.00505 0.00006 0.00254 0.00330 0.00763 0.02143 ... 0.03237 0.01166 21.118 1 0.611137 0.776156 -5.249770 0.391002 2.407313 0.249740
11 phon_R01_S02_6 91.904 115.871 86.292 0.00540 0.00006 0.00281 0.00336 0.00844 0.02752 ... 0.04272 0.01141 21.414 1 0.583390 0.792520 -4.960234 0.363566 2.642476 0.275931
12 phon_R01_S04_1 136.926 159.866 131.276 0.00293 0.00002 0.00118 0.00153 0.00355 0.01259 ... 0.01968 0.00581 25.703 1 0.460600 0.646846 -6.547148 0.152813 2.041277 0.138512
13 phon_R01_S04_2 139.173 179.139 76.556 0.00390 0.00003 0.00165 0.00208 0.00496 0.01642 ... 0.02184 0.01041 24.889 1 0.430166 0.665833 -5.660217 0.254989 2.519422 0.199889
14 phon_R01_S04_3 152.845 163.305 75.836 0.00294 0.00002 0.00121 0.00149 0.00364 0.01828 ... 0.03191 0.00609 24.922 1 0.474791 0.654027 -6.105098 0.203653 2.125618 0.170100
15 phon_R01_S04_4 142.167 217.455 83.159 0.00369 0.00003 0.00157 0.00203 0.00471 0.01503 ... 0.02316 0.00839 25.175 1 0.565924 0.658245 -5.340115 0.210185 2.205546 0.234589
16 phon_R01_S04_5 144.188 349.259 82.764 0.00544 0.00004 0.00211 0.00292 0.00632 0.02047 ... 0.02908 0.01859 22.333 1 0.567380 0.644692 -5.440040 0.239764 2.264501 0.218164
17 phon_R01_S04_6 168.778 232.181 75.603 0.00718 0.00004 0.00284 0.00387 0.00853 0.03327 ... 0.04322 0.02919 20.376 1 0.631099 0.605417 -2.931070 0.434326 3.007463 0.430788
18 phon_R01_S05_1 153.046 175.829 68.623 0.00742 0.00005 0.00364 0.00432 0.01092 0.05517 ... 0.07413 0.03160 17.280 1 0.665318 0.719467 -3.949079 0.357870 3.109010 0.377429
19 phon_R01_S05_2 156.405 189.398 142.822 0.00768 0.00005 0.00372 0.00399 0.01116 0.03995 ... 0.05164 0.03365 17.153 1 0.649554 0.686080 -4.554466 0.340176 2.856676 0.322111
20 phon_R01_S05_3 153.848 165.738 65.782 0.00840 0.00005 0.00428 0.00450 0.01285 0.03810 ... 0.05000 0.03871 17.536 1 0.660125 0.704087 -4.095442 0.262564 2.739710 0.365391
21 phon_R01_S05_4 153.880 172.860 78.128 0.00480 0.00003 0.00232 0.00267 0.00696 0.04137 ... 0.06062 0.01849 19.493 1 0.629017 0.698951 -5.186960 0.237622 2.557536 0.259765
22 phon_R01_S05_5 167.930 193.221 79.068 0.00442 0.00003 0.00220 0.00247 0.00661 0.04351 ... 0.06685 0.01280 22.468 1 0.619060 0.679834 -4.330956 0.262384 2.916777 0.285695
23 phon_R01_S05_6 173.917 192.735 86.180 0.00476 0.00003 0.00221 0.00258 0.00663 0.04192 ... 0.06562 0.01840 20.422 1 0.537264 0.686894 -5.248776 0.210279 2.547508 0.253556
24 phon_R01_S06_1 163.656 200.841 76.779 0.00742 0.00005 0.00380 0.00390 0.01140 0.01659 ... 0.02214 0.01778 23.831 1 0.397937 0.732479 -5.557447 0.220890 2.692176 0.215961
25 phon_R01_S06_2 104.400 206.002 77.968 0.00633 0.00006 0.00316 0.00375 0.00948 0.03767 ... 0.05197 0.02887 22.066 1 0.522746 0.737948 -5.571843 0.236853 2.846369 0.219514
26 phon_R01_S06_3 171.041 208.313 75.501 0.00455 0.00003 0.00250 0.00234 0.00750 0.01966 ... 0.02666 0.01095 25.908 1 0.418622 0.720916 -6.183590 0.226278 2.589702 0.147403
27 phon_R01_S06_4 146.845 208.701 81.737 0.00496 0.00003 0.00250 0.00275 0.00749 0.01919 ... 0.02650 0.01328 25.119 1 0.358773 0.726652 -6.271690 0.196102 2.314209 0.162999
28 phon_R01_S06_5 155.358 227.383 80.055 0.00310 0.00002 0.00159 0.00176 0.00476 0.01718 ... 0.02307 0.00677 25.970 1 0.470478 0.676258 -7.120925 0.279789 2.241742 0.108514
29 phon_R01_S06_6 162.568 198.346 77.630 0.00502 0.00003 0.00280 0.00253 0.00841 0.01791 ... 0.02380 0.01170 25.678 1 0.427785 0.723797 -6.635729 0.209866 1.957961 0.135242
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
165 phon_R01_S42_1 236.200 244.663 102.137 0.00277 0.00001 0.00154 0.00153 0.00462 0.02448 ... 0.04231 0.00620 24.078 0 0.469928 0.628232 -6.816086 0.172270 2.235197 0.119652
166 phon_R01_S42_2 237.323 243.709 229.256 0.00303 0.00001 0.00173 0.00159 0.00519 0.01242 ... 0.02089 0.00533 24.679 0 0.384868 0.626710 -7.018057 0.176316 1.852402 0.091604
167 phon_R01_S42_3 260.105 264.919 237.303 0.00339 0.00001 0.00205 0.00186 0.00616 0.02030 ... 0.03557 0.00910 21.083 0 0.440988 0.628058 -7.517934 0.160414 1.881767 0.075587
168 phon_R01_S42_4 197.569 217.627 90.794 0.00803 0.00004 0.00490 0.00448 0.01470 0.02177 ... 0.03836 0.01337 19.269 0 0.372222 0.725216 -5.736781 0.164529 2.882450 0.202879
169 phon_R01_S42_5 240.301 245.135 219.783 0.00517 0.00002 0.00316 0.00283 0.00949 0.02018 ... 0.03529 0.00965 21.020 0 0.371837 0.646167 -7.169701 0.073298 2.266432 0.100881
170 phon_R01_S42_6 244.990 272.210 239.170 0.00451 0.00002 0.00279 0.00237 0.00837 0.01897 ... 0.03253 0.01049 21.528 0 0.522812 0.646818 -7.304500 0.171088 2.095237 0.096220
171 phon_R01_S43_1 112.547 133.374 105.715 0.00355 0.00003 0.00166 0.00190 0.00499 0.01358 ... 0.01992 0.00435 26.436 0 0.413295 0.756700 -6.323531 0.218885 2.193412 0.160376
172 phon_R01_S43_2 110.739 113.597 100.139 0.00356 0.00003 0.00170 0.00200 0.00510 0.01484 ... 0.02261 0.00430 26.550 0 0.369090 0.776158 -6.085567 0.192375 1.889002 0.174152
173 phon_R01_S43_3 113.715 116.443 96.913 0.00349 0.00003 0.00171 0.00203 0.00514 0.01472 ... 0.02245 0.00478 26.547 0 0.380253 0.766700 -5.943501 0.192150 1.852542 0.179677
174 phon_R01_S43_4 117.004 144.466 99.923 0.00353 0.00003 0.00176 0.00218 0.00528 0.01657 ... 0.02643 0.00590 25.445 0 0.387482 0.756482 -6.012559 0.229298 1.872946 0.163118
175 phon_R01_S43_5 115.380 123.109 108.634 0.00332 0.00003 0.00160 0.00199 0.00480 0.01503 ... 0.02436 0.00401 26.005 0 0.405991 0.761255 -5.966779 0.197938 1.974857 0.184067
176 phon_R01_S43_6 116.388 129.038 108.970 0.00346 0.00003 0.00169 0.00213 0.00507 0.01725 ... 0.02623 0.00415 26.143 0 0.361232 0.763242 -6.016891 0.109256 2.004719 0.174429
177 phon_R01_S44_1 151.737 190.204 129.859 0.00314 0.00002 0.00135 0.00162 0.00406 0.01469 ... 0.02184 0.00570 24.151 1 0.396610 0.745957 -6.486822 0.197919 2.449763 0.132703
178 phon_R01_S44_2 148.790 158.359 138.990 0.00309 0.00002 0.00152 0.00186 0.00456 0.01574 ... 0.02518 0.00488 24.412 1 0.402591 0.762508 -6.311987 0.182459 2.251553 0.160306
179 phon_R01_S44_3 148.143 155.982 135.041 0.00392 0.00003 0.00204 0.00231 0.00612 0.01450 ... 0.02175 0.00540 23.683 1 0.398499 0.778349 -5.711205 0.240875 2.845109 0.192730
180 phon_R01_S44_4 150.440 163.441 144.736 0.00396 0.00003 0.00206 0.00233 0.00619 0.02551 ... 0.03964 0.00611 23.133 1 0.352396 0.759320 -6.261446 0.183218 2.264226 0.144105
181 phon_R01_S44_5 148.462 161.078 141.998 0.00397 0.00003 0.00202 0.00235 0.00605 0.01831 ... 0.02849 0.00639 22.866 1 0.408598 0.768845 -5.704053 0.216204 2.679185 0.197710
182 phon_R01_S44_6 149.818 163.417 144.786 0.00336 0.00002 0.00174 0.00198 0.00521 0.02145 ... 0.03464 0.00595 23.008 1 0.329577 0.757180 -6.277170 0.109397 2.209021 0.156368
183 phon_R01_S49_1 117.226 123.925 106.656 0.00417 0.00004 0.00186 0.00270 0.00558 0.01909 ... 0.02592 0.00955 23.079 0 0.603515 0.669565 -5.619070 0.191576 2.027228 0.215724
184 phon_R01_S49_2 116.848 217.552 99.503 0.00531 0.00005 0.00260 0.00346 0.00780 0.01795 ... 0.02429 0.01179 22.085 0 0.663842 0.656516 -5.198864 0.206768 2.120412 0.252404
185 phon_R01_S49_3 116.286 177.291 96.983 0.00314 0.00003 0.00134 0.00192 0.00403 0.01564 ... 0.02001 0.00737 24.199 0 0.598515 0.654331 -5.592584 0.133917 2.058658 0.214346
186 phon_R01_S49_4 116.556 592.030 86.228 0.00496 0.00004 0.00254 0.00263 0.00762 0.01660 ... 0.02460 0.01397 23.958 0 0.566424 0.667654 -6.431119 0.153310 2.161936 0.120605
187 phon_R01_S49_5 116.342 581.289 94.246 0.00267 0.00002 0.00115 0.00148 0.00345 0.01300 ... 0.01892 0.00680 25.023 0 0.528485 0.663884 -6.359018 0.116636 2.152083 0.138868
188 phon_R01_S49_6 114.563 119.167 86.647 0.00327 0.00003 0.00146 0.00184 0.00439 0.01185 ... 0.01672 0.00703 24.775 0 0.555303 0.659132 -6.710219 0.149694 1.913990 0.121777
189 phon_R01_S50_1 201.774 262.707 78.228 0.00694 0.00003 0.00412 0.00396 0.01235 0.02574 ... 0.04363 0.04441 19.368 0 0.508479 0.683761 -6.934474 0.159890 2.316346 0.112838
190 phon_R01_S50_2 174.188 230.978 94.261 0.00459 0.00003 0.00263 0.00259 0.00790 0.04087 ... 0.07008 0.02764 19.517 0 0.448439 0.657899 -6.538586 0.121952 2.657476 0.133050
191 phon_R01_S50_3 209.516 253.017 89.488 0.00564 0.00003 0.00331 0.00292 0.00994 0.02751 ... 0.04812 0.01810 19.147 0 0.431674 0.683244 -6.195325 0.129303 2.784312 0.168895
192 phon_R01_S50_4 174.688 240.005 74.287 0.01360 0.00008 0.00624 0.00564 0.01873 0.02308 ... 0.03804 0.10715 17.883 0 0.407567 0.655683 -6.787197 0.158453 2.679772 0.131728
193 phon_R01_S50_5 198.764 396.961 74.904 0.00740 0.00004 0.00370 0.00390 0.01109 0.02296 ... 0.03794 0.07223 19.020 0 0.451221 0.643956 -6.744577 0.207454 2.138608 0.123306
194 phon_R01_S50_6 214.289 260.277 77.973 0.00567 0.00003 0.00295 0.00317 0.00885 0.01884 ... 0.03078 0.04398 21.209 0 0.462803 0.664357 -5.724056 0.190667 2.555477 0.148569

195 rows × 24 columns

In [8]:
# for features in dataset1.columns:
#     if dataset1[features].dtype == 'object':
#         dataset1[features] = pd.Categorical(dataset1[features]).codes
In [9]:
dataset2= dataset1.drop(['name'],axis =1)
In [10]:
dataset2.shape  # Analysis on the dimension of the dataset
Out[10]:
(195, 23)
In [11]:
dataset2.dtypes  # Analysing the dataset datatypes 

# There are int and float datatypes 
# No object datatypes are being present 
Out[11]:
MDVP:Fo(Hz)         float64
MDVP:Fhi(Hz)        float64
MDVP:Flo(Hz)        float64
MDVP:Jitter(%)      float64
MDVP:Jitter(Abs)    float64
MDVP:RAP            float64
MDVP:PPQ            float64
Jitter:DDP          float64
MDVP:Shimmer        float64
MDVP:Shimmer(dB)    float64
Shimmer:APQ3        float64
Shimmer:APQ5        float64
MDVP:APQ            float64
Shimmer:DDA         float64
NHR                 float64
HNR                 float64
status                int64
RPDE                float64
DFA                 float64
spread1             float64
spread2             float64
D2                  float64
PPE                 float64
dtype: object
In [12]:
dataset2.isnull().sum()     #Code to check whether any missing values are present on the dataset
                            #  and to get it treated
Out[12]:
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64
In [13]:
dataset2.head(10)   # inital 10 head datas of the Parkinsons dataset to understand the value distribution 
Out[13]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 0.426 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 0.626 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 0.482 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 0.517 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 0.584 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335
5 120.552 131.162 113.787 0.00968 0.00008 0.00463 0.00750 0.01388 0.04701 0.456 ... 0.06985 0.01222 21.378 1 0.415564 0.825069 -4.242867 0.299111 2.187560 0.357775
6 120.267 137.244 114.820 0.00333 0.00003 0.00155 0.00202 0.00466 0.01608 0.140 ... 0.02337 0.00607 24.886 1 0.596040 0.764112 -5.634322 0.257682 1.854785 0.211756
7 107.332 113.840 104.315 0.00290 0.00003 0.00144 0.00182 0.00431 0.01567 0.134 ... 0.02487 0.00344 26.892 1 0.637420 0.763262 -6.167603 0.183721 2.064693 0.163755
8 95.730 132.068 91.754 0.00551 0.00006 0.00293 0.00332 0.00880 0.02093 0.191 ... 0.03218 0.01070 21.812 1 0.615551 0.773587 -5.498678 0.327769 2.322511 0.231571
9 95.056 120.103 91.226 0.00532 0.00006 0.00268 0.00332 0.00803 0.02838 0.255 ... 0.04324 0.01022 21.862 1 0.547037 0.798463 -5.011879 0.325996 2.432792 0.271362

10 rows × 23 columns

In [14]:
dataset2.describe()    # describe function could provide the distribution of the values of the dataset to study the mean, median and outliers presented on the dataset
Out[14]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
count 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 ... 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000
mean 154.228641 197.104918 116.324631 0.006220 0.000044 0.003306 0.003446 0.009920 0.029709 0.282251 ... 0.046993 0.024847 21.885974 0.753846 0.498536 0.718099 -5.684397 0.226510 2.381826 0.206552
std 41.390065 91.491548 43.521413 0.004848 0.000035 0.002968 0.002759 0.008903 0.018857 0.194877 ... 0.030459 0.040418 4.425764 0.431878 0.103942 0.055336 1.090208 0.083406 0.382799 0.090119
min 88.333000 102.145000 65.476000 0.001680 0.000007 0.000680 0.000920 0.002040 0.009540 0.085000 ... 0.013640 0.000650 8.441000 0.000000 0.256570 0.574282 -7.964984 0.006274 1.423287 0.044539
25% 117.572000 134.862500 84.291000 0.003460 0.000020 0.001660 0.001860 0.004985 0.016505 0.148500 ... 0.024735 0.005925 19.198000 1.000000 0.421306 0.674758 -6.450096 0.174351 2.099125 0.137451
50% 148.790000 175.829000 104.315000 0.004940 0.000030 0.002500 0.002690 0.007490 0.022970 0.221000 ... 0.038360 0.011660 22.085000 1.000000 0.495954 0.722254 -5.720868 0.218885 2.361532 0.194052
75% 182.769000 224.205500 140.018500 0.007365 0.000060 0.003835 0.003955 0.011505 0.037885 0.350000 ... 0.060795 0.025640 25.075500 1.000000 0.587562 0.761881 -5.046192 0.279234 2.636456 0.252980
max 260.105000 592.030000 239.170000 0.033160 0.000260 0.021440 0.019580 0.064330 0.119080 1.302000 ... 0.169420 0.314820 33.047000 1.000000 0.685151 0.825288 -2.434031 0.450493 3.671155 0.527367

8 rows × 23 columns

In [15]:
figure1=plt.figure(figsize = [20,20])        # plt.figure for the figure creation
ax= figure1.gca()                            # This code will get axes instance on the current figure matching the keyword argws
dataset2.hist(ax=ax)                         # The figure procedures are passed as the arguments to the histogram plot

                                             # The histogram plot is being applied to the dataset1
plt.show()                                   # For showing all the plots, this function being called
C:\Users\HP\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3296: UserWarning: To output multiple subplots, the figure containing the passed axes is being cleared
  exec(code_obj, self.user_global_ns, self.user_ns)
In [16]:
dataset2.isnull().any()  # any() will return the boolean values of the columns on the dataset1
Out[16]:
MDVP:Fo(Hz)         False
MDVP:Fhi(Hz)        False
MDVP:Flo(Hz)        False
MDVP:Jitter(%)      False
MDVP:Jitter(Abs)    False
MDVP:RAP            False
MDVP:PPQ            False
Jitter:DDP          False
MDVP:Shimmer        False
MDVP:Shimmer(dB)    False
Shimmer:APQ3        False
Shimmer:APQ5        False
MDVP:APQ            False
Shimmer:DDA         False
NHR                 False
HNR                 False
status              False
RPDE                False
DFA                 False
spread1             False
spread2             False
D2                  False
PPE                 False
dtype: bool
In [17]:
dataset2.columns
Out[17]:
Index(['MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)',
       'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP',
       'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5',
       'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA',
       'spread1', 'spread2', 'D2', 'PPE'],
      dtype='object')
In [18]:
dataset2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 23 columns):
MDVP:Fo(Hz)         195 non-null float64
MDVP:Fhi(Hz)        195 non-null float64
MDVP:Flo(Hz)        195 non-null float64
MDVP:Jitter(%)      195 non-null float64
MDVP:Jitter(Abs)    195 non-null float64
MDVP:RAP            195 non-null float64
MDVP:PPQ            195 non-null float64
Jitter:DDP          195 non-null float64
MDVP:Shimmer        195 non-null float64
MDVP:Shimmer(dB)    195 non-null float64
Shimmer:APQ3        195 non-null float64
Shimmer:APQ5        195 non-null float64
MDVP:APQ            195 non-null float64
Shimmer:DDA         195 non-null float64
NHR                 195 non-null float64
HNR                 195 non-null float64
status              195 non-null int64
RPDE                195 non-null float64
DFA                 195 non-null float64
spread1             195 non-null float64
spread2             195 non-null float64
D2                  195 non-null float64
PPE                 195 non-null float64
dtypes: float64(22), int64(1)
memory usage: 35.1 KB
In [19]:
dataset2["status"].unique()
Out[19]:
array([1, 0], dtype=int64)
In [20]:
dataset2.status.value_counts()

# dataset1["status"].value_counts() --> this is also prooduce the above results
Out[20]:
1    147
0     48
Name: status, dtype: int64
In [21]:
dataset2[dataset2['status']==1].describe()  #Study for the people with chance of Parkinsons disease
Out[21]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
count 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000 ... 147.000000 147.000000 147.000000 147.0 147.000000 147.000000 147.000000 147.000000 147.000000 147.000000
mean 145.180762 188.441463 106.893558 0.006989 0.000051 0.003757 0.003900 0.011273 0.033658 0.321204 ... 0.053027 0.029211 20.974048 1.0 0.516816 0.725408 -5.333420 0.248133 2.456058 0.233828
std 32.348050 88.339180 32.274358 0.005240 0.000037 0.003241 0.002998 0.009724 0.019970 0.207798 ... 0.032391 0.044447 4.339143 0.0 0.101254 0.054786 0.970792 0.077809 0.375742 0.084271
min 88.333000 102.145000 65.476000 0.001680 0.000010 0.000680 0.000920 0.002040 0.010220 0.090000 ... 0.013640 0.002310 8.441000 1.0 0.263654 0.574282 -7.120925 0.063412 1.765957 0.093193
25% 117.572000 133.776500 80.875500 0.004005 0.000030 0.002030 0.002190 0.006085 0.018295 0.168000 ... 0.027400 0.008445 18.782000 1.0 0.439064 0.685569 -6.038300 0.199507 2.180933 0.170103
50% 145.174000 163.335000 99.770000 0.005440 0.000040 0.002840 0.003140 0.008530 0.028380 0.263000 ... 0.044510 0.016580 21.414000 1.0 0.530529 0.726652 -5.440040 0.240875 2.439597 0.222716
75% 170.071000 207.160500 129.240000 0.007670 0.000060 0.004100 0.004360 0.012300 0.042525 0.394500 ... 0.068455 0.027960 24.164500 1.0 0.604573 0.764868 -4.664067 0.303660 2.668479 0.274397
max 223.361000 588.518000 199.020000 0.033160 0.000260 0.021440 0.019580 0.064330 0.119080 1.302000 ... 0.169420 0.314820 29.928000 1.0 0.685151 0.825288 -2.434031 0.450493 3.671155 0.527367

8 rows × 23 columns

In [22]:
dataset2[dataset2['status']==0].describe() #Study for the people without of Parkinsons disease
Out[22]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
count 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000 ... 48.000000 48.000000 48.00000 48.0 48.000000 48.000000 48.000000 48.000000 48.000000 48.000000
mean 181.937771 223.636750 145.207292 0.003866 0.000023 0.001925 0.002056 0.005776 0.017615 0.162958 ... 0.028511 0.011483 24.67875 0.0 0.442552 0.695716 -6.759264 0.160292 2.154491 0.123017
std 52.731067 96.727067 58.757070 0.002055 0.000015 0.001066 0.000943 0.003199 0.005544 0.057822 ... 0.010368 0.019088 3.43454 0.0 0.092199 0.051346 0.642782 0.062982 0.310269 0.044820
min 110.739000 113.597000 74.287000 0.001780 0.000007 0.000920 0.001060 0.002760 0.009540 0.085000 ... 0.014030 0.000650 17.88300 0.0 0.256570 0.626710 -7.964984 0.006274 1.423287 0.044539
25% 120.947500 139.413250 98.243750 0.002655 0.000010 0.001332 0.001480 0.003998 0.014475 0.129000 ... 0.022060 0.004188 22.99325 0.0 0.372126 0.654291 -7.257665 0.120623 1.974217 0.094658
50% 198.996000 231.161500 113.938500 0.003355 0.000025 0.001625 0.001775 0.004875 0.016705 0.154000 ... 0.026330 0.004825 24.99700 0.0 0.435368 0.682527 -6.826448 0.167356 2.129510 0.115119
75% 229.077000 251.239250 199.183000 0.004530 0.000030 0.001907 0.002228 0.005725 0.020210 0.189250 ... 0.034540 0.009213 26.13925 0.0 0.507748 0.742284 -6.350146 0.193766 2.339487 0.147761
max 260.105000 592.030000 239.170000 0.013600 0.000080 0.006240 0.005640 0.018730 0.040870 0.405000 ... 0.070080 0.107150 33.04700 0.0 0.663842 0.785714 -5.198864 0.291954 2.882450 0.252404

8 rows × 23 columns

In [23]:
dataset2.groupby(by=['status']).count()  # Another way of finding the unique values on the python
print(dataset2.groupby(by=['status']).mean())

# Groupby() is the concept used,when dealing with large types of the values (binding the rows into groups depending in the features on the dataset) 
        MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
status                                                            
0        181.937771    223.636750    145.207292        0.003866   
1        145.180762    188.441463    106.893558        0.006989   

        MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
status                                                                   
0               0.000023  0.001925  0.002056    0.005776      0.017615   
1               0.000051  0.003757  0.003900    0.011273      0.033658   

        MDVP:Shimmer(dB)  ...  MDVP:APQ  Shimmer:DDA       NHR        HNR  \
status                    ...                                               
0               0.162958  ...  0.013305     0.028511  0.011483  24.678750   
1               0.321204  ...  0.027600     0.053027  0.029211  20.974048   

            RPDE       DFA   spread1   spread2        D2       PPE  
status                                                              
0       0.442552  0.695716 -6.759264  0.160292  2.154491  0.123017  
1       0.516816  0.725408 -5.333420  0.248133  2.456058  0.233828  

[2 rows x 22 columns]
In [24]:
dataset1.groupby(by=['status']).mean()   # Mean study based on the status 
Out[24]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE DFA spread1 spread2 D2 PPE
status
0 181.937771 223.636750 145.207292 0.003866 0.000023 0.001925 0.002056 0.005776 0.017615 0.162958 ... 0.013305 0.028511 0.011483 24.678750 0.442552 0.695716 -6.759264 0.160292 2.154491 0.123017
1 145.180762 188.441463 106.893558 0.006989 0.000051 0.003757 0.003900 0.011273 0.033658 0.321204 ... 0.027600 0.053027 0.029211 20.974048 0.516816 0.725408 -5.333420 0.248133 2.456058 0.233828

2 rows × 22 columns

In [25]:
dataset2.groupby(by=['status']).median() # Median study based on the status 
Out[25]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE DFA spread1 spread2 D2 PPE
status
0 198.996 231.1615 113.9385 0.003355 0.000025 0.001625 0.001775 0.004875 0.016705 0.154 ... 0.013015 0.02633 0.004825 24.997 0.435368 0.682527 -6.826448 0.167356 2.129510 0.115119
1 145.174 163.3350 99.7700 0.005440 0.000040 0.002840 0.003140 0.008530 0.028380 0.263 ... 0.021570 0.04451 0.016580 21.414 0.530529 0.726652 -5.440040 0.240875 2.439597 0.222716

2 rows × 22 columns

In [26]:
#Study of the dataset through the graph
sns.pairplot(data= dataset2)
Out[26]:
<seaborn.axisgrid.PairGrid at 0xd9b2828>
In [27]:
sns.pairplot(data= dataset2,hue = "status")
C:\Users\HP\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:488: RuntimeWarning: invalid value encountered in true_divide
  binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
C:\Users\HP\Anaconda3\lib\site-packages\statsmodels\nonparametric\kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
  FAC1 = 2*(np.pi*bw/RANGE)**2
Out[27]:
<seaborn.axisgrid.PairGrid at 0x211900f0>
In [28]:
from sklearn.model_selection import train_test_split
X = dataset2.drop(['status'], axis =1)
#X
#print(X.shape)

y= dataset2['status']
#print(y.shape)                    #(195,)


#y = dataset1[['status','HNR']]    
#print(y.shape)                   # (195, 2)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.30)
In [29]:
X_train
Out[29]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE DFA spread1 spread2 D2 PPE
51 126.344 134.231 112.773 0.00448 0.000040 0.00131 0.00169 0.00393 0.02033 0.185 ... 0.01614 0.03429 0.00474 25.030 0.507504 0.760361 -6.689151 0.291954 2.431854 0.105993
158 126.144 154.284 97.543 0.00975 0.000080 0.00593 0.00454 0.01778 0.02852 0.266 ... 0.02157 0.04499 0.03828 21.534 0.635015 0.627337 -5.070096 0.280091 2.892300 0.249703
139 116.150 131.731 109.815 0.00381 0.000030 0.00181 0.00232 0.00542 0.03026 0.267 ... 0.02770 0.04543 0.01827 18.801 0.624811 0.696049 -5.866357 0.233070 2.445646 0.184985
145 223.361 263.872 87.638 0.00352 0.000020 0.00169 0.00188 0.00506 0.02536 0.225 ... 0.01909 0.04137 0.01493 20.366 0.566849 0.574282 -5.456811 0.345238 2.840556 0.232861
118 178.285 442.824 82.063 0.00462 0.000030 0.00157 0.00194 0.00472 0.01279 0.129 ... 0.01151 0.01851 0.00856 25.020 0.470422 0.655239 -4.913137 0.393056 2.816781 0.251972
191 209.516 253.017 89.488 0.00564 0.000030 0.00331 0.00292 0.00994 0.02751 0.263 ... 0.01879 0.04812 0.01810 19.147 0.431674 0.683244 -6.195325 0.129303 2.784312 0.168895
113 210.141 232.706 185.258 0.00534 0.000030 0.00321 0.00280 0.00964 0.01680 0.149 ... 0.01301 0.02583 0.00620 23.671 0.441097 0.722254 -5.963040 0.250283 2.489191 0.177807
73 112.014 588.518 107.024 0.00533 0.000050 0.00268 0.00329 0.00805 0.02448 0.226 ... 0.01956 0.04120 0.00623 24.178 0.509127 0.789532 -5.389129 0.306636 1.928708 0.225461
168 197.569 217.627 90.794 0.00803 0.000040 0.00490 0.00448 0.01470 0.02177 0.189 ... 0.01439 0.03836 0.01337 19.269 0.372222 0.725216 -5.736781 0.164529 2.882450 0.202879
0 119.992 157.302 74.997 0.00784 0.000070 0.00370 0.00554 0.01109 0.04374 0.426 ... 0.02971 0.06545 0.02211 21.033 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
178 148.790 158.359 138.990 0.00309 0.000020 0.00152 0.00186 0.00456 0.01574 0.142 ... 0.01309 0.02518 0.00488 24.412 0.402591 0.762508 -6.311987 0.182459 2.251553 0.160306
123 182.018 197.173 79.187 0.00842 0.000050 0.00506 0.00449 0.01517 0.02503 0.231 ... 0.01931 0.04115 0.01813 18.784 0.589956 0.732903 -5.445140 0.142466 2.174306 0.215558
42 237.226 247.326 225.227 0.00298 0.000010 0.00169 0.00182 0.00507 0.01752 0.164 ... 0.01133 0.03104 0.00740 22.736 0.305062 0.654172 -7.310550 0.098648 2.416838 0.095032
57 117.274 129.916 110.402 0.00752 0.000060 0.00299 0.00469 0.00898 0.02293 0.221 ... 0.01948 0.03568 0.00681 22.817 0.530529 0.817756 -4.608260 0.290024 2.021591 0.314464
44 243.439 250.912 232.435 0.00210 0.000009 0.00109 0.00137 0.00327 0.01419 0.126 ... 0.01033 0.02330 0.00454 25.368 0.438296 0.635285 -7.057869 0.091608 2.330716 0.091470
93 152.125 161.469 76.596 0.00382 0.000030 0.00191 0.00226 0.00574 0.05925 0.637 ... 0.04398 0.10024 0.01211 20.969 0.447456 0.697790 -6.152551 0.173520 2.080121 0.160809
45 242.852 255.034 227.911 0.00225 0.000009 0.00117 0.00139 0.00350 0.01494 0.134 ... 0.01014 0.02542 0.00476 25.032 0.431285 0.638928 -6.995820 0.102083 2.365800 0.102706
34 203.184 211.526 196.160 0.00178 0.000009 0.00094 0.00106 0.00283 0.00958 0.085 ... 0.00726 0.01403 0.00065 33.047 0.340068 0.741899 -7.964984 0.163519 1.423287 0.044539
108 151.989 157.339 132.857 0.00174 0.000010 0.00075 0.00096 0.00225 0.01024 0.093 ... 0.00993 0.01364 0.00238 29.928 0.311369 0.676066 -6.739151 0.160686 2.296873 0.115130
97 125.036 143.946 116.187 0.01280 0.000100 0.00743 0.00623 0.02228 0.03886 0.342 ... 0.03088 0.06406 0.08151 15.338 0.629574 0.714485 -4.020042 0.265315 2.671825 0.340623
172 110.739 113.597 100.139 0.00356 0.000030 0.00170 0.00200 0.00510 0.01484 0.133 ... 0.01285 0.02261 0.00430 26.550 0.369090 0.776158 -6.085567 0.192375 1.889002 0.174152
82 100.960 110.019 95.628 0.00606 0.000060 0.00351 0.00348 0.01053 0.02427 0.216 ... 0.01751 0.04114 0.01237 20.536 0.554610 0.787896 -5.022288 0.146948 2.428306 0.264666
11 91.904 115.871 86.292 0.00540 0.000060 0.00281 0.00336 0.00844 0.02752 0.249 ... 0.02214 0.04272 0.01141 21.414 0.583390 0.792520 -4.960234 0.363566 2.642476 0.275931
134 106.516 112.777 93.105 0.00589 0.000060 0.00291 0.00319 0.00873 0.04932 0.441 ... 0.03651 0.08050 0.03031 17.060 0.637814 0.744064 -5.301321 0.320385 2.375138 0.243080
189 201.774 262.707 78.228 0.00694 0.000030 0.00412 0.00396 0.01235 0.02574 0.255 ... 0.01758 0.04363 0.04441 19.368 0.508479 0.683761 -6.934474 0.159890 2.316346 0.112838
127 166.888 198.966 79.512 0.00638 0.000040 0.00368 0.00351 0.01104 0.02857 0.257 ... 0.02301 0.04641 0.01796 18.330 0.585169 0.736964 -5.825257 0.115697 1.996146 0.196535
78 95.385 102.145 90.264 0.00608 0.000060 0.00331 0.00332 0.00994 0.03202 0.263 ... 0.02455 0.05408 0.01062 21.875 0.644954 0.779612 -5.115212 0.249494 2.017753 0.260015
129 120.078 126.632 105.667 0.00270 0.000020 0.00116 0.00135 0.00349 0.01022 0.090 ... 0.00903 0.01428 0.00487 26.369 0.491345 0.718839 -5.892061 0.195976 2.108873 0.183572
185 116.286 177.291 96.983 0.00314 0.000030 0.00134 0.00192 0.00403 0.01564 0.136 ... 0.01691 0.02001 0.00737 24.199 0.598515 0.654331 -5.592584 0.133917 2.058658 0.214346
146 169.774 191.759 151.451 0.01568 0.000090 0.00863 0.00946 0.02589 0.08143 0.821 ... 0.08808 0.11411 0.07530 12.359 0.561610 0.793509 -3.297668 0.414758 3.413649 0.457533
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
64 229.401 252.221 221.156 0.00205 0.000009 0.00114 0.00113 0.00342 0.01457 0.129 ... 0.01016 0.02308 0.00300 26.415 0.276850 0.673636 -7.496264 0.056844 2.003032 0.073581
102 139.224 586.567 66.157 0.03011 0.000220 0.01854 0.01628 0.05563 0.09419 0.930 ... 0.06023 0.16654 0.25930 10.489 0.596362 0.641418 -3.269487 0.270641 2.690917 0.444774
91 151.955 163.335 147.226 0.00419 0.000030 0.00224 0.00227 0.00672 0.07959 0.772 ... 0.05690 0.13262 0.01658 19.664 0.501037 0.714360 -6.411497 0.207156 2.344876 0.134120
61 223.365 238.987 98.664 0.00264 0.000010 0.00154 0.00151 0.00461 0.01906 0.165 ... 0.01340 0.03039 0.00301 26.138 0.447979 0.686264 -7.293801 0.086372 2.321560 0.098555
14 152.845 163.305 75.836 0.00294 0.000020 0.00121 0.00149 0.00364 0.01828 0.158 ... 0.01246 0.03191 0.00609 24.922 0.474791 0.654027 -6.105098 0.203653 2.125618 0.170100
95 157.447 163.267 149.605 0.00369 0.000020 0.00201 0.00197 0.00602 0.03272 0.283 ... 0.02571 0.05439 0.01018 21.693 0.447285 0.705658 -6.247076 0.180528 2.344348 0.164916
67 136.969 166.607 66.004 0.00923 0.000070 0.00507 0.00463 0.01520 0.03111 0.308 ... 0.02603 0.04914 0.02659 19.979 0.498133 0.729067 -5.324574 0.205660 2.291558 0.226247
157 117.963 134.209 100.757 0.01813 0.000150 0.01117 0.00718 0.03351 0.04912 0.438 ... 0.02916 0.07830 0.10748 19.075 0.630547 0.646786 -3.444478 0.303214 2.964568 0.261305
111 208.519 220.315 199.020 0.00609 0.000030 0.00368 0.00339 0.01105 0.01761 0.155 ... 0.01307 0.02855 0.00830 22.407 0.338097 0.712466 -6.471427 0.184378 2.502336 0.136390
138 112.239 126.609 104.095 0.00472 0.000040 0.00238 0.00290 0.00715 0.05643 0.517 ... 0.04451 0.09211 0.02629 17.366 0.640945 0.701404 -5.634576 0.306014 2.419253 0.209191
33 202.266 211.604 197.079 0.00180 0.000009 0.00093 0.00107 0.00278 0.00954 0.085 ... 0.00719 0.01407 0.00072 32.684 0.368535 0.742133 -7.695734 0.178540 1.544609 0.056141
52 128.001 138.052 122.080 0.00436 0.000030 0.00137 0.00166 0.00411 0.02297 0.210 ... 0.01677 0.03969 0.00481 24.692 0.459766 0.766204 -7.072419 0.220434 1.972297 0.119308
75 110.707 122.611 105.007 0.00516 0.000050 0.00277 0.00289 0.00831 0.02215 0.206 ... 0.01715 0.03851 0.00472 25.197 0.463514 0.807217 -5.477592 0.315074 1.862092 0.228624
101 128.451 150.449 75.632 0.01551 0.000120 0.00905 0.00909 0.02716 0.06170 0.584 ... 0.05174 0.09669 0.11843 15.060 0.639808 0.643327 -4.202730 0.310163 2.638279 0.356881
122 138.190 203.522 83.340 0.00704 0.000050 0.00406 0.00398 0.01218 0.04479 0.441 ... 0.03220 0.07761 0.01968 18.305 0.538016 0.741480 -5.418787 0.160267 2.090438 0.229892
144 202.544 241.350 164.168 0.00254 0.000010 0.00100 0.00133 0.00301 0.02662 0.228 ... 0.02006 0.04426 0.01049 20.680 0.497480 0.630409 -6.132663 0.220617 2.576563 0.159777
46 245.510 262.090 231.848 0.00235 0.000010 0.00127 0.00148 0.00380 0.01608 0.141 ... 0.01149 0.02719 0.00476 24.602 0.467489 0.631653 -7.156076 0.127642 2.392122 0.097336
71 136.358 176.595 65.750 0.00971 0.000070 0.00534 0.00478 0.01601 0.04978 0.483 ... 0.03736 0.08247 0.03361 18.570 0.543299 0.733232 -5.207985 0.224852 2.642276 0.242981
124 156.239 195.107 79.820 0.00694 0.000040 0.00403 0.00395 0.01209 0.02343 0.224 ... 0.01720 0.03867 0.02020 19.196 0.618663 0.728421 -5.944191 0.143359 1.929715 0.181988
16 144.188 349.259 82.764 0.00544 0.000040 0.00211 0.00292 0.00632 0.02047 0.192 ... 0.02074 0.02908 0.01859 22.333 0.567380 0.644692 -5.440040 0.239764 2.264501 0.218164
77 110.568 125.394 106.821 0.00462 0.000040 0.00226 0.00280 0.00677 0.02199 0.197 ... 0.01636 0.03852 0.00420 25.820 0.429484 0.816340 -5.391029 0.250572 1.777901 0.232744
3 116.676 137.871 111.366 0.00997 0.000090 0.00502 0.00698 0.01505 0.05492 0.517 ... 0.03772 0.08771 0.01353 20.644 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
86 178.222 202.450 141.047 0.00321 0.000020 0.00163 0.00194 0.00488 0.03759 0.327 ... 0.02784 0.06219 0.03151 15.924 0.598714 0.712199 -6.366916 0.335753 2.654271 0.144614
19 156.405 189.398 142.822 0.00768 0.000050 0.00372 0.00399 0.01116 0.03995 0.348 ... 0.04310 0.05164 0.03365 17.153 0.649554 0.686080 -4.554466 0.340176 2.856676 0.322111
68 143.533 162.215 65.809 0.01101 0.000080 0.00647 0.00467 0.01941 0.05384 0.478 ... 0.03392 0.09455 0.04882 20.338 0.513237 0.731444 -5.869750 0.151814 2.118496 0.185580
54 108.807 134.656 102.874 0.00761 0.000070 0.00349 0.00486 0.01046 0.02719 0.255 ... 0.02067 0.04450 0.01036 21.028 0.536009 0.819032 -4.649573 0.205558 1.986899 0.316700
130 120.289 128.143 100.209 0.00492 0.000040 0.00269 0.00238 0.00808 0.01412 0.125 ... 0.01194 0.02110 0.01610 23.949 0.467160 0.724045 -6.135296 0.203630 2.539724 0.169923
104 154.003 160.267 128.621 0.00183 0.000010 0.00076 0.00100 0.00229 0.01030 0.094 ... 0.00871 0.01406 0.00243 28.409 0.263654 0.691483 -7.111576 0.144780 2.065477 0.093193
187 116.342 581.289 94.246 0.00267 0.000020 0.00115 0.00148 0.00345 0.01300 0.117 ... 0.01144 0.01892 0.00680 25.023 0.528485 0.663884 -6.359018 0.116636 2.152083 0.138868
22 167.930 193.221 79.068 0.00442 0.000030 0.00220 0.00247 0.00661 0.04351 0.377 ... 0.04246 0.06685 0.01280 22.468 0.619060 0.679834 -4.330956 0.262384 2.916777 0.285695

136 rows × 22 columns

In [30]:
X_test
Out[30]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE DFA spread1 spread2 D2 PPE
131 120.256 125.306 104.773 0.00407 0.00003 0.00224 0.00205 0.00671 0.01516 0.138 ... 0.01310 0.02164 0.01015 26.017 0.468621 0.735136 -6.112667 0.217013 2.527742 0.170633
60 209.144 237.494 109.379 0.00282 0.00001 0.00147 0.00152 0.00442 0.01861 0.170 ... 0.01382 0.02925 0.00871 25.554 0.341788 0.678874 -7.040508 0.066994 2.460791 0.101516
53 129.336 139.867 118.604 0.00490 0.00004 0.00165 0.00183 0.00495 0.02498 0.228 ... 0.01947 0.04188 0.00484 25.429 0.420383 0.785714 -6.836811 0.269866 2.223719 0.147491
136 113.400 133.344 107.816 0.00451 0.00004 0.00219 0.00283 0.00658 0.04879 0.431 ... 0.04370 0.07154 0.02278 19.013 0.647900 0.708144 -4.378916 0.300067 2.445502 0.259451
181 148.462 161.078 141.998 0.00397 0.00003 0.00202 0.00235 0.00605 0.01831 0.163 ... 0.01559 0.02849 0.00639 22.866 0.408598 0.768845 -5.704053 0.216204 2.679185 0.197710
94 157.821 172.975 68.401 0.00358 0.00002 0.00196 0.00196 0.00587 0.03716 0.307 ... 0.02764 0.06185 0.00850 22.219 0.502380 0.712170 -6.251425 0.188056 2.143851 0.160812
135 110.453 127.611 105.554 0.00494 0.00004 0.00244 0.00315 0.00731 0.04128 0.379 ... 0.03316 0.06688 0.02529 17.707 0.653427 0.706687 -5.333619 0.322044 2.631793 0.228319
192 174.688 240.005 74.287 0.01360 0.00008 0.00624 0.00564 0.01873 0.02308 0.256 ... 0.01667 0.03804 0.10715 17.883 0.407567 0.655683 -6.787197 0.158453 2.679772 0.131728
43 241.404 248.834 232.483 0.00281 0.00001 0.00157 0.00173 0.00470 0.01760 0.154 ... 0.01251 0.03017 0.00675 23.145 0.457702 0.634267 -6.793547 0.158266 2.256699 0.117399
48 122.188 128.611 115.765 0.00524 0.00004 0.00169 0.00203 0.00507 0.01613 0.143 ... 0.01433 0.02566 0.00839 23.162 0.579597 0.733659 -6.439398 0.266392 2.079922 0.133867
27 146.845 208.701 81.737 0.00496 0.00003 0.00250 0.00275 0.00749 0.01919 0.198 ... 0.01826 0.02650 0.01328 25.119 0.358773 0.726652 -6.271690 0.196102 2.314209 0.162999
100 125.641 141.068 116.346 0.03316 0.00026 0.02144 0.01522 0.06433 0.09178 0.891 ... 0.06196 0.16074 0.31482 8.867 0.671299 0.656846 -3.700544 0.260481 2.991063 0.370961
17 168.778 232.181 75.603 0.00718 0.00004 0.00284 0.00387 0.00853 0.03327 0.348 ... 0.03430 0.04322 0.02919 20.376 0.631099 0.605417 -2.931070 0.434326 3.007463 0.430788
143 202.805 231.508 86.232 0.00370 0.00002 0.00189 0.00211 0.00568 0.01997 0.180 ... 0.01506 0.03350 0.02010 18.687 0.536102 0.632631 -5.898673 0.213353 2.470746 0.189032
28 155.358 227.383 80.055 0.00310 0.00002 0.00159 0.00176 0.00476 0.01718 0.161 ... 0.01661 0.02307 0.00677 25.970 0.470478 0.676258 -7.120925 0.279789 2.241742 0.108514
20 153.848 165.738 65.782 0.00840 0.00005 0.00428 0.00450 0.01285 0.03810 0.328 ... 0.04055 0.05000 0.03871 17.536 0.660125 0.704087 -4.095442 0.262564 2.739710 0.365391
85 180.978 200.125 155.495 0.00406 0.00002 0.00220 0.00244 0.00659 0.03852 0.331 ... 0.02877 0.06321 0.02782 16.176 0.583574 0.727747 -5.657899 0.315903 3.098256 0.200423
163 112.150 131.669 97.527 0.00519 0.00005 0.00291 0.00284 0.00873 0.01756 0.155 ... 0.01363 0.02902 0.01435 21.219 0.557045 0.673086 -5.617124 0.184896 1.871871 0.212386
159 127.930 138.752 112.173 0.00605 0.00005 0.00321 0.00318 0.00962 0.03235 0.339 ... 0.03105 0.04079 0.02663 19.651 0.654945 0.675865 -5.498456 0.234196 2.103014 0.216638
18 153.046 175.829 68.623 0.00742 0.00005 0.00364 0.00432 0.01092 0.05517 0.542 ... 0.05767 0.07413 0.03160 17.280 0.665318 0.719467 -3.949079 0.357870 3.109010 0.377429
80 96.106 108.664 84.510 0.00694 0.00007 0.00389 0.00415 0.01168 0.04024 0.364 ... 0.02876 0.06799 0.01823 19.055 0.544805 0.770466 -4.441519 0.155097 2.645959 0.327978
58 116.879 131.897 108.153 0.00788 0.00007 0.00334 0.00493 0.01003 0.02645 0.265 ... 0.02137 0.04183 0.00786 22.603 0.540049 0.813432 -4.476755 0.262633 1.827012 0.326197
182 149.818 163.417 144.786 0.00336 0.00002 0.00174 0.00198 0.00521 0.02145 0.198 ... 0.01666 0.03464 0.00595 23.008 0.329577 0.757180 -6.277170 0.109397 2.209021 0.156368
23 173.917 192.735 86.180 0.00476 0.00003 0.00221 0.00258 0.00663 0.04192 0.364 ... 0.03772 0.06562 0.01840 20.422 0.537264 0.686894 -5.248776 0.210279 2.547508 0.253556
56 110.417 131.067 103.370 0.00784 0.00007 0.00352 0.00514 0.01056 0.03715 0.334 ... 0.02802 0.06097 0.00969 21.422 0.541781 0.821364 -4.438453 0.238298 1.922940 0.335041
175 115.380 123.109 108.634 0.00332 0.00003 0.00160 0.00199 0.00480 0.01503 0.137 ... 0.01133 0.02436 0.00401 26.005 0.405991 0.761255 -5.966779 0.197938 1.974857 0.184067
30 197.076 206.896 192.055 0.00289 0.00001 0.00166 0.00168 0.00498 0.01098 0.097 ... 0.00802 0.01689 0.00339 26.775 0.422229 0.741367 -7.348300 0.177551 1.743867 0.085569
193 198.764 396.961 74.904 0.00740 0.00004 0.00370 0.00390 0.01109 0.02296 0.241 ... 0.01588 0.03794 0.07223 19.020 0.451221 0.643956 -6.744577 0.207454 2.138608 0.123306
36 177.876 192.921 168.013 0.00411 0.00002 0.00233 0.00241 0.00700 0.02126 0.189 ... 0.01612 0.03463 0.00586 23.216 0.360148 0.778834 -6.149653 0.218037 2.477082 0.165827
167 260.105 264.919 237.303 0.00339 0.00001 0.00205 0.00186 0.00616 0.02030 0.197 ... 0.01367 0.03557 0.00910 21.083 0.440988 0.628058 -7.517934 0.160414 1.881767 0.075587
105 149.689 160.368 133.608 0.00257 0.00002 0.00116 0.00134 0.00349 0.01346 0.126 ... 0.01059 0.01979 0.00578 27.421 0.365488 0.719974 -6.997403 0.210279 1.994387 0.112878
24 163.656 200.841 76.779 0.00742 0.00005 0.00380 0.00390 0.01140 0.01659 0.164 ... 0.01497 0.02214 0.01778 23.831 0.397937 0.732479 -5.557447 0.220890 2.692176 0.215961
179 148.143 155.982 135.041 0.00392 0.00003 0.00204 0.00231 0.00612 0.01450 0.131 ... 0.01263 0.02175 0.00540 23.683 0.398499 0.778349 -5.711205 0.240875 2.845109 0.192730
1 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 0.626 ... 0.04368 0.09403 0.01929 19.085 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
173 113.715 116.443 96.913 0.00349 0.00003 0.00171 0.00203 0.00514 0.01472 0.133 ... 0.01148 0.02245 0.00478 26.547 0.380253 0.766700 -5.943501 0.192150 1.852542 0.179677
133 118.747 123.723 109.836 0.00331 0.00003 0.00168 0.00171 0.00504 0.01043 0.099 ... 0.00903 0.01471 0.00504 25.619 0.482296 0.723096 -6.448134 0.178713 2.034827 0.141422
72 120.080 139.710 111.208 0.00405 0.00003 0.00180 0.00220 0.00540 0.01706 0.152 ... 0.01345 0.02921 0.00442 25.742 0.495954 0.762959 -5.791820 0.329066 2.205024 0.188180
160 114.238 124.393 77.022 0.00581 0.00005 0.00299 0.00316 0.00896 0.04009 0.406 ... 0.04114 0.04736 0.02073 20.437 0.653139 0.694571 -5.185987 0.259229 2.151121 0.244948
92 148.272 164.989 142.299 0.00459 0.00003 0.00250 0.00256 0.00750 0.04190 0.383 ... 0.03051 0.07150 0.01914 18.780 0.454444 0.734504 -5.952058 0.087840 2.344336 0.186489
161 115.322 135.738 107.802 0.00619 0.00005 0.00352 0.00329 0.01057 0.03273 0.325 ... 0.02931 0.04933 0.02810 19.388 0.577802 0.684373 -5.283009 0.226528 2.442906 0.238281
170 244.990 272.210 239.170 0.00451 0.00002 0.00279 0.00237 0.00837 0.01897 0.181 ... 0.01255 0.03253 0.01049 21.528 0.522812 0.646818 -7.304500 0.171088 2.095237 0.096220
140 170.368 268.796 79.543 0.00571 0.00003 0.00232 0.00269 0.00696 0.03273 0.281 ... 0.02824 0.05139 0.02485 18.540 0.677131 0.685057 -4.796845 0.397749 2.963799 0.277227
116 158.219 442.557 71.948 0.00476 0.00003 0.00214 0.00207 0.00642 0.01458 0.148 ... 0.01312 0.01818 0.01554 26.356 0.450798 0.653823 -6.051233 0.273280 2.640798 0.170106
96 159.116 168.913 144.811 0.00342 0.00002 0.00178 0.00184 0.00535 0.03381 0.307 ... 0.02809 0.05417 0.00852 22.663 0.366329 0.693429 -6.417440 0.194627 2.473239 0.151709
142 198.458 219.290 148.691 0.00376 0.00002 0.00182 0.00215 0.00546 0.03527 0.297 ... 0.02530 0.06165 0.01728 18.702 0.606273 0.661735 -5.585259 0.310746 2.465528 0.209863
162 114.554 126.778 91.121 0.00651 0.00006 0.00366 0.00340 0.01097 0.03658 0.369 ... 0.03091 0.05592 0.02707 18.954 0.685151 0.719576 -5.529833 0.242750 2.408689 0.220520
153 121.345 139.644 98.250 0.00684 0.00006 0.00388 0.00332 0.01164 0.02534 0.241 ... 0.02056 0.04019 0.04179 21.520 0.566867 0.670475 -4.865194 0.246404 2.013530 0.168581
119 217.116 233.481 93.978 0.00404 0.00002 0.00127 0.00128 0.00381 0.01299 0.124 ... 0.01075 0.02038 0.00681 24.581 0.462516 0.582710 -5.517173 0.389295 2.925862 0.220657
32 198.383 215.203 193.104 0.00212 0.00001 0.00113 0.00135 0.00339 0.01263 0.111 ... 0.00951 0.01919 0.00119 30.775 0.465946 0.738703 -7.067931 0.175181 1.512275 0.096320
184 116.848 217.552 99.503 0.00531 0.00005 0.00260 0.00346 0.00780 0.01795 0.163 ... 0.01756 0.02429 0.01179 22.085 0.663842 0.656516 -5.198864 0.206768 2.120412 0.252404
59 114.847 271.314 104.680 0.00867 0.00008 0.00373 0.00520 0.01120 0.03225 0.350 ... 0.02519 0.05414 0.01143 21.660 0.547975 0.817396 -4.609161 0.221711 1.831691 0.316395
112 204.664 221.300 189.621 0.00841 0.00004 0.00502 0.00485 0.01506 0.02378 0.210 ... 0.01767 0.03831 0.01316 21.305 0.498877 0.722085 -4.876336 0.212054 2.376749 0.268144
126 138.145 197.238 81.114 0.00544 0.00004 0.00294 0.00327 0.00883 0.02791 0.246 ... 0.02259 0.04451 0.01794 18.178 0.623209 0.738245 -5.540351 0.087165 1.821297 0.214075
188 114.563 119.167 86.647 0.00327 0.00003 0.00146 0.00184 0.00439 0.01185 0.106 ... 0.01095 0.01672 0.00703 24.775 0.555303 0.659132 -6.710219 0.149694 1.913990 0.121777
117 170.756 450.247 79.032 0.00555 0.00003 0.00244 0.00261 0.00731 0.01725 0.175 ... 0.01652 0.02270 0.01802 25.690 0.486738 0.676023 -4.597834 0.372114 2.975889 0.282780
25 104.400 206.002 77.968 0.00633 0.00006 0.00316 0.00375 0.00948 0.03767 0.381 ... 0.03780 0.05197 0.02887 22.066 0.522746 0.737948 -5.571843 0.236853 2.846369 0.219514
90 166.605 206.008 78.032 0.00742 0.00004 0.00387 0.00453 0.01161 0.06640 0.634 ... 0.05114 0.10949 0.08725 11.744 0.653410 0.733165 -4.508984 0.389232 3.317586 0.301952
141 208.083 253.792 91.802 0.00757 0.00004 0.00428 0.00428 0.01285 0.06725 0.571 ... 0.04464 0.12047 0.04238 15.648 0.606344 0.665945 -5.410336 0.288917 2.665133 0.231723
137 113.166 130.270 100.673 0.00502 0.00004 0.00257 0.00312 0.00772 0.05279 0.476 ... 0.04134 0.08689 0.03690 16.747 0.625362 0.708617 -4.654894 0.304107 2.672362 0.274387

59 rows × 22 columns

In [31]:
y_train
Out[31]:
51     0
158    1
139    1
145    1
118    1
191    0
113    1
73     1
168    0
0      1
178    1
123    1
42     0
57     1
44     0
93     1
45     0
34     0
108    1
97     1
172    0
82     1
11     1
134    1
189    0
127    1
78     1
129    1
185    0
146    1
      ..
64     0
102    1
91     1
61     0
14     1
95     1
67     1
157    1
111    1
138    1
33     0
52     0
75     1
101    1
122    1
144    1
46     0
71     1
124    1
16     1
77     1
3      1
86     1
19     1
68     1
54     1
130    1
104    1
187    0
22     1
Name: status, Length: 136, dtype: int64
In [32]:
from sklearn.tree import DecisionTreeClassifier
dt_model1 = DecisionTreeClassifier(criterion = 'entropy',random_state =1)   

# Model creation using the entropy method
In [33]:
dt_model1.fit(X_train,y_train)

# Fitting the train values inside the fit method
Out[33]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best')
In [34]:
# calculating the accuracy value on the test and train datas

print(dt_model1.score(X_train,y_train))    # Using the train values
print(dt_model1.score(X_test,y_test))      # Using the test values  
# 86% accuravy value is being achieved  without applying any parameter conditions 
    
1.0
0.8305084745762712
In [35]:
dt_model1 = DecisionTreeClassifier(criterion = 'entropy',max_depth = 12)   
dt_model1.fit(X_train,y_train)
Out[35]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=12,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')
In [36]:
print(dt_model1.score(X_train,y_train))    # Using the train values
#Training error is zero in other words 

print(dt_model1.score(X_test,y_test))      # Using the test values  
# 86% of the test records are correctly classified 
1.0
0.8305084745762712
In [37]:
# Regularizing part of the Decision Tree


for  values in range(1,15):
    dt_model1 = DecisionTreeClassifier(criterion = 'entropy',max_depth=values)
    dt_model1.fit(X_train,y_train)
    print("Training value accuracy_score for :",dt_model1.score(X_train,y_train),"Testing Value accuracy_value for :",dt_model1.score(X_test,y_test))
    
    
    # For the max_depth of 4, the accuracy values are found to be good, avoiding the overfitting condition 
Training value accuracy_score for : 0.8676470588235294 Testing Value accuracy_value for : 0.847457627118644
Training value accuracy_score for : 0.875 Testing Value accuracy_value for : 0.847457627118644
Training value accuracy_score for : 0.875 Testing Value accuracy_value for : 0.847457627118644
Training value accuracy_score for : 0.8897058823529411 Testing Value accuracy_value for : 0.7966101694915254
Training value accuracy_score for : 0.9411764705882353 Testing Value accuracy_value for : 0.8135593220338984
Training value accuracy_score for : 0.9779411764705882 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
Training value accuracy_score for : 1.0 Testing Value accuracy_value for : 0.8305084745762712
In [38]:
from sklearn.feature_extraction.text import CountVectorizer
In [39]:
y_predict = dt_model1.predict(X_test)
In [40]:
dt_model1 = DecisionTreeClassifier(criterion = 'entropy',max_depth = 4)   
dt_model1.fit(X_train,y_train)
Out[40]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')
In [41]:
print(dt_model1.score(X_train,y_train))    

print(dt_model1.score(X_test,y_test))       
0.8897058823529411
0.7966101694915254
In [97]:
from sklearn import metrics
print(metrics.confusion_matrix(y_test,y_predict))

# Dataframe creation for the heatmap 

dataframe_for_heatmap = pd.DataFrame(metrics.confusion_matrix(y_test,y_predict),index=['Healthy','Parkinsons'],columns=['Healthy','Parkinsons'])
#print(type(dataframe_for_heatmap))
print(dataframe_for_heatmap)
[[ 9  5]
 [ 5 40]]
            Healthy  Parkinsons
Healthy           9           5
Parkinsons        5          40
In [102]:
dataframe_for_heatmap
Out[102]:
Healthy Parkinsons
Healthy 9 5
Parkinsons 5 40
In [96]:
# Heatmap design
sns.heatmap(data = dataframe_for_heatmap,annot = True)
plt.xlabel('Acutal')
plt.ylabel('Predicted')
plt.show()
In [43]:
print(pd.DataFrame(dt_model1.feature_importances_, columns=['Imp'],index =X_train.columns ))
                       Imp
MDVP:Fo(Hz)       0.000000
MDVP:Fhi(Hz)      0.000000
MDVP:Flo(Hz)      0.251038
MDVP:Jitter(%)    0.000000
MDVP:Jitter(Abs)  0.000000
MDVP:RAP          0.071047
MDVP:PPQ          0.000000
Jitter:DDP        0.000000
MDVP:Shimmer      0.000000
MDVP:Shimmer(dB)  0.000000
Shimmer:APQ3      0.000000
Shimmer:APQ5      0.000000
MDVP:APQ          0.000000
Shimmer:DDA       0.000000
NHR               0.000000
HNR               0.000000
RPDE              0.000000
DFA               0.000000
spread1           0.237025
spread2           0.000000
D2                0.000000
PPE               0.440890
In [109]:
# Using min_sample_leaf as theonly parameter on the decision tree classifier 
for values in range(1,15):
    dt_model1 = DecisionTreeClassifier(criterion = 'entropy',min_samples_leaf=values)   
    dt_model1.fit(X_train,y_train)
    print("Training value accuracy_score:",dt_model1.score(X_train,y_train),"Testing Value accuracy_value:",dt_model1.score(X_test,y_test))
    
    
    
# At the values of min_samples_leaf=6, the accuracy values for the training and  test railings are found to be nearly matching and similar 
Training value accuracy_score: 1.0 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9852941176470589 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9779411764705882 Testing Value accuracy_value: 0.8135593220338984
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9705882352941176 Testing Value accuracy_value: 0.8305084745762712
Training value accuracy_score: 0.9558823529411765 Testing Value accuracy_value: 0.864406779661017
Training value accuracy_score: 0.9191176470588235 Testing Value accuracy_value: 0.8135593220338984
Training value accuracy_score: 0.9044117647058824 Testing Value accuracy_value: 0.847457627118644
Training value accuracy_score: 0.9044117647058824 Testing Value accuracy_value: 0.847457627118644
Training value accuracy_score: 0.9044117647058824 Testing Value accuracy_value: 0.847457627118644
In [45]:
dt_model1 = DecisionTreeClassifier(criterion = 'entropy',min_samples_leaf=6)   
dt_model1.fit(X_train,y_train)
Out[45]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=6, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')
In [46]:
print(dt_model1.score(X_train,y_train))    
print(dt_model1.score(X_test,y_test)) 
print(metrics.confusion_matrix(y_test,y_predict))
0.9705882352941176
0.8305084745762712
[[ 9  5]
 [ 5 40]]

Using Ensemble Random Forest Classifier

In [69]:
from sklearn.ensemble import RandomForestClassifier
model2 = RandomForestClassifier(n_estimators=5)
model2.fit(X_train,y_train)

# 5 instances creation 
Out[69]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
In [70]:
model2.score(X_train,y_train)
Out[70]:
0.9926470588235294
In [71]:
# For training set 
test_predict = model2.predict(X_test)
model2.score(X_test,y_test)

# Everytime, when this technique is given run, they will not produce accurate results, they will keep on changing for every run
# Hence random_state seeding is required  to produce the more accurate results
# For selecting that value, the below code follows
Out[71]:
0.8983050847457628
In [108]:
from sklearn.ensemble import RandomForestClassifier
for x in range(1,14):
    model2 = RandomForestClassifier(n_estimators=x,random_state=252)
    model2.fit(X_train,y_train)
    print("Training value accuracy_score  :",model2.score(X_train,y_train))
    test_predict = model2.predict(X_test)
    print("Testing Value accuracy_value  :",model2.score(X_test,y_test))
    
    
    
# 1) For the n_estimators for value = 5, we are getting the 'best accuracy score' when compared to other values, since
# on the syntax of the RandomForestClassifier(n_estimators =10), the general value is 10 
    
Training value accuracy_score  : 0.8897058823529411
Testing Value accuracy_value  : 0.7966101694915254
Training value accuracy_score  : 0.8970588235294118
Testing Value accuracy_value  : 0.8305084745762712
Training value accuracy_score  : 0.9558823529411765
Testing Value accuracy_value  : 0.864406779661017
Training value accuracy_score  : 0.9852941176470589
Testing Value accuracy_value  : 0.9152542372881356
Training value accuracy_score  : 0.9779411764705882
Testing Value accuracy_value  : 0.9322033898305084
Training value accuracy_score  : 0.9705882352941176
Testing Value accuracy_value  : 0.864406779661017
Training value accuracy_score  : 0.9926470588235294
Testing Value accuracy_value  : 0.9152542372881356
Training value accuracy_score  : 0.9779411764705882
Testing Value accuracy_value  : 0.9322033898305084
Training value accuracy_score  : 0.9852941176470589
Testing Value accuracy_value  : 0.9152542372881356
Training value accuracy_score  : 0.9852941176470589
Testing Value accuracy_value  : 0.9322033898305084
Training value accuracy_score  : 0.9926470588235294
Testing Value accuracy_value  : 0.8983050847457628
Training value accuracy_score  : 1.0
Testing Value accuracy_value  : 0.9152542372881356
Training value accuracy_score  : 1.0
Testing Value accuracy_value  : 0.9152542372881356
In [ ]: